Here, we’re just setting a few options.
knitr::opts_chunk$set(
warning = TRUE, # show warnings during codebook generation
message = TRUE, # show messages during codebook generation
error = TRUE, # do not interrupt codebook generation in case of errors,
# usually better for debugging
echo = TRUE # show R code
)
ggplot2::theme_set(ggplot2::theme_bw())
pander::panderOptions("table.split.table", Inf)
Now, we’re preparing our data for the codebook.
library(codebook)
library(here)
## here() starts at C:/Users/eirin/Documents/MPI Psycholinguistics/Teaching/IMPRSConf2020
library(readr)
codebook_data <- read_csv(here("raw-data", "2019.csv"))
## Parsed with column specification:
## cols(
## `Overall rank` = col_double(),
## `Country or region` = col_character(),
## Score = col_double(),
## `GDP per capita` = col_double(),
## `Social support` = col_double(),
## `Healthy life expectancy` = col_double(),
## `Freedom to make life choices` = col_double(),
## Generosity = col_double(),
## `Perceptions of corruption` = col_double()
## )
### I have added a name to this dataset as metadata
metadata(codebook_data)$name <- "World Happiness reports data"
### practice!
### add temporal coverage metadata to this codebook
### take a look at the vignette if you're not sure how
# omit the following lines, if your missing values are already properly labelled
codebook_data <- detect_missing(codebook_data,
only_labelled = TRUE, # only labelled values are autodetected as missing
negative_values_are_missing = FALSE, # negative values are missing values
ninety_nine_problems = TRUE, # 99/999 are missing values, if they are more than 5 MAD from the median
)
# The following line finds item aggregates with names like this:
# scale = scale_1 + scale_2R + scale_3R
# identifying these aggregates allows the codebook function to
# automatically compute reliabilities.
# However, it will not reverse items automatically.
codebook_data <- detect_scales(codebook_data)
Create codebook
codebook(codebook_data)
## No missing values.
knitr::asis_output(data_info)
if (exists("name", meta)) {
glue::glue(
"__Dataset name__: {name}",
.envir = meta)
}
Dataset name: World Happiness reports data
cat(description)
The dataset has N=156 rows and 9 columns. 156 rows have no missing values on any column.
Metadata for search engines
meta <- meta[setdiff(names(meta),
c("creator", "datePublished", "identifier",
"url", "citation", "spatialCoverage",
"temporalCoverage", "description", "name"))]
pander::pander(meta)
knitr::asis_output(survey_overview)
if (detailed_variables || detailed_scales) {
knitr::asis_output(paste0(scales_items, sep = "\n\n\n", collapse = "\n\n\n"))
}
show_missing_values <- FALSE
if (has_labels(item)) {
missing_values <- item[is.na(haven::zap_missing(item))]
attributes(missing_values) <- attributes(item)
if (!is.null(attributes(item)$labels)) {
attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
}
if (is.double(item)) {
show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
item <- haven::zap_missing(item)
}
if (length(item_attributes$labels) == 0 && is.numeric(item)) {
item <- haven::zap_labels(item)
}
}
item_nomiss <- item[!is.na(item)]
# unnest mc_multiple and so on
if (
is.character(item_nomiss) &&
any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
!is.null(item_info) &&
(exists("type", item_info) &&
any(stringr::str_detect(item_info$type,
pattern = stringr::fixed("multiple"))))
) {
item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)
old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
if ( go_vertical ) {
# numeric items are plotted horizontally (because that's what usually expected)
# categorical items are plotted vertically because we can use the screen real estate better this way
if (is.null(choices) ||
dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
non_missing_choices <- unique(item_nomiss)
names(non_missing_choices) <- non_missing_choices
}
choice_multiplier <- old_height/6.5
new_height <- 2 + choice_multiplier * length(non_missing_choices)
new_height <- ifelse(new_height > 20, 20, new_height)
new_height <- ifelse(new_height < 1, 1, new_height)
if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
new_height <- old_height
}
knitr::opts_chunk$set(fig.height = new_height)
}
wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
if (is.character(item_nomiss)) {
char_count <- stringr::str_count(item_nomiss)
attributes(char_count)$label <- item_label
plot_labelled(char_count,
item_name, wrap_at, FALSE, trans = "log1p", "characters")
} else {
cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}
}
knitr::opts_chunk$set(fig.height = old_height)
0 missing values.
attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
| name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
|---|---|---|---|---|---|---|---|---|---|---|
| Overall rank | numeric | 0 | 1 | 1 | 78 | 156 | 78.5 | 45.17743 | ▇▇▇▇▇ | NA |
if (show_missing_values) {
plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
# don't show choices again, if they're basically same thing as value labels
if (!is.null(choices) && !is.null(item_info$choices) &&
all(names(na.omit(choices)) == item_info$choices) &&
all(na.omit(choices) == names(item_info$choices))) {
item_info$choices <- NULL
}
item_info$label_parsed <-
item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
pander::pander(as.list(choices))
}
show_missing_values <- FALSE
if (has_labels(item)) {
missing_values <- item[is.na(haven::zap_missing(item))]
attributes(missing_values) <- attributes(item)
if (!is.null(attributes(item)$labels)) {
attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
}
if (is.double(item)) {
show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
item <- haven::zap_missing(item)
}
if (length(item_attributes$labels) == 0 && is.numeric(item)) {
item <- haven::zap_labels(item)
}
}
item_nomiss <- item[!is.na(item)]
# unnest mc_multiple and so on
if (
is.character(item_nomiss) &&
any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
!is.null(item_info) &&
(exists("type", item_info) &&
any(stringr::str_detect(item_info$type,
pattern = stringr::fixed("multiple"))))
) {
item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)
old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
if ( go_vertical ) {
# numeric items are plotted horizontally (because that's what usually expected)
# categorical items are plotted vertically because we can use the screen real estate better this way
if (is.null(choices) ||
dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
non_missing_choices <- unique(item_nomiss)
names(non_missing_choices) <- non_missing_choices
}
choice_multiplier <- old_height/6.5
new_height <- 2 + choice_multiplier * length(non_missing_choices)
new_height <- ifelse(new_height > 20, 20, new_height)
new_height <- ifelse(new_height < 1, 1, new_height)
if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
new_height <- old_height
}
knitr::opts_chunk$set(fig.height = new_height)
}
wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
if (is.character(item_nomiss)) {
char_count <- stringr::str_count(item_nomiss)
attributes(char_count)$label <- item_label
plot_labelled(char_count,
item_name, wrap_at, FALSE, trans = "log1p", "characters")
} else {
cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}
}
knitr::opts_chunk$set(fig.height = old_height)
0 missing values.
attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
| name | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace | label |
|---|---|---|---|---|---|---|---|---|---|
| Country or region | character | 0 | 1 | 156 | 0 | 4 | 24 | 0 | NA |
if (show_missing_values) {
plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
# don't show choices again, if they're basically same thing as value labels
if (!is.null(choices) && !is.null(item_info$choices) &&
all(names(na.omit(choices)) == item_info$choices) &&
all(na.omit(choices) == names(item_info$choices))) {
item_info$choices <- NULL
}
item_info$label_parsed <-
item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
pander::pander(as.list(choices))
}
show_missing_values <- FALSE
if (has_labels(item)) {
missing_values <- item[is.na(haven::zap_missing(item))]
attributes(missing_values) <- attributes(item)
if (!is.null(attributes(item)$labels)) {
attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
}
if (is.double(item)) {
show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
item <- haven::zap_missing(item)
}
if (length(item_attributes$labels) == 0 && is.numeric(item)) {
item <- haven::zap_labels(item)
}
}
item_nomiss <- item[!is.na(item)]
# unnest mc_multiple and so on
if (
is.character(item_nomiss) &&
any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
!is.null(item_info) &&
(exists("type", item_info) &&
any(stringr::str_detect(item_info$type,
pattern = stringr::fixed("multiple"))))
) {
item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)
old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
if ( go_vertical ) {
# numeric items are plotted horizontally (because that's what usually expected)
# categorical items are plotted vertically because we can use the screen real estate better this way
if (is.null(choices) ||
dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
non_missing_choices <- unique(item_nomiss)
names(non_missing_choices) <- non_missing_choices
}
choice_multiplier <- old_height/6.5
new_height <- 2 + choice_multiplier * length(non_missing_choices)
new_height <- ifelse(new_height > 20, 20, new_height)
new_height <- ifelse(new_height < 1, 1, new_height)
if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
new_height <- old_height
}
knitr::opts_chunk$set(fig.height = new_height)
}
wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
if (is.character(item_nomiss)) {
char_count <- stringr::str_count(item_nomiss)
attributes(char_count)$label <- item_label
plot_labelled(char_count,
item_name, wrap_at, FALSE, trans = "log1p", "characters")
} else {
cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}
}
knitr::opts_chunk$set(fig.height = old_height)
0 missing values.
attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
| name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
|---|---|---|---|---|---|---|---|---|---|---|
| Score | numeric | 0 | 1 | 2.9 | 5.4 | 7.8 | 5.407096 | 1.11312 | ▂▇▇▇▃ | NA |
if (show_missing_values) {
plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
# don't show choices again, if they're basically same thing as value labels
if (!is.null(choices) && !is.null(item_info$choices) &&
all(names(na.omit(choices)) == item_info$choices) &&
all(na.omit(choices) == names(item_info$choices))) {
item_info$choices <- NULL
}
item_info$label_parsed <-
item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
pander::pander(as.list(choices))
}
show_missing_values <- FALSE
if (has_labels(item)) {
missing_values <- item[is.na(haven::zap_missing(item))]
attributes(missing_values) <- attributes(item)
if (!is.null(attributes(item)$labels)) {
attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
}
if (is.double(item)) {
show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
item <- haven::zap_missing(item)
}
if (length(item_attributes$labels) == 0 && is.numeric(item)) {
item <- haven::zap_labels(item)
}
}
item_nomiss <- item[!is.na(item)]
# unnest mc_multiple and so on
if (
is.character(item_nomiss) &&
any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
!is.null(item_info) &&
(exists("type", item_info) &&
any(stringr::str_detect(item_info$type,
pattern = stringr::fixed("multiple"))))
) {
item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)
old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
if ( go_vertical ) {
# numeric items are plotted horizontally (because that's what usually expected)
# categorical items are plotted vertically because we can use the screen real estate better this way
if (is.null(choices) ||
dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
non_missing_choices <- unique(item_nomiss)
names(non_missing_choices) <- non_missing_choices
}
choice_multiplier <- old_height/6.5
new_height <- 2 + choice_multiplier * length(non_missing_choices)
new_height <- ifelse(new_height > 20, 20, new_height)
new_height <- ifelse(new_height < 1, 1, new_height)
if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
new_height <- old_height
}
knitr::opts_chunk$set(fig.height = new_height)
}
wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
if (is.character(item_nomiss)) {
char_count <- stringr::str_count(item_nomiss)
attributes(char_count)$label <- item_label
plot_labelled(char_count,
item_name, wrap_at, FALSE, trans = "log1p", "characters")
} else {
cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}
}
knitr::opts_chunk$set(fig.height = old_height)
0 missing values.
attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
| name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
|---|---|---|---|---|---|---|---|---|---|---|
| GDP per capita | numeric | 0 | 1 | 0 | 0.96 | 1.7 | 0.9051474 | 0.3983895 | ▃▅▇▇▃ | NA |
if (show_missing_values) {
plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
# don't show choices again, if they're basically same thing as value labels
if (!is.null(choices) && !is.null(item_info$choices) &&
all(names(na.omit(choices)) == item_info$choices) &&
all(na.omit(choices) == names(item_info$choices))) {
item_info$choices <- NULL
}
item_info$label_parsed <-
item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
pander::pander(as.list(choices))
}
show_missing_values <- FALSE
if (has_labels(item)) {
missing_values <- item[is.na(haven::zap_missing(item))]
attributes(missing_values) <- attributes(item)
if (!is.null(attributes(item)$labels)) {
attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
}
if (is.double(item)) {
show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
item <- haven::zap_missing(item)
}
if (length(item_attributes$labels) == 0 && is.numeric(item)) {
item <- haven::zap_labels(item)
}
}
item_nomiss <- item[!is.na(item)]
# unnest mc_multiple and so on
if (
is.character(item_nomiss) &&
any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
!is.null(item_info) &&
(exists("type", item_info) &&
any(stringr::str_detect(item_info$type,
pattern = stringr::fixed("multiple"))))
) {
item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)
old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
if ( go_vertical ) {
# numeric items are plotted horizontally (because that's what usually expected)
# categorical items are plotted vertically because we can use the screen real estate better this way
if (is.null(choices) ||
dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
non_missing_choices <- unique(item_nomiss)
names(non_missing_choices) <- non_missing_choices
}
choice_multiplier <- old_height/6.5
new_height <- 2 + choice_multiplier * length(non_missing_choices)
new_height <- ifelse(new_height > 20, 20, new_height)
new_height <- ifelse(new_height < 1, 1, new_height)
if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
new_height <- old_height
}
knitr::opts_chunk$set(fig.height = new_height)
}
wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
if (is.character(item_nomiss)) {
char_count <- stringr::str_count(item_nomiss)
attributes(char_count)$label <- item_label
plot_labelled(char_count,
item_name, wrap_at, FALSE, trans = "log1p", "characters")
} else {
cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}
}
knitr::opts_chunk$set(fig.height = old_height)
0 missing values.
attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
| name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
|---|---|---|---|---|---|---|---|---|---|---|
| Social support | numeric | 0 | 1 | 0 | 1.3 | 1.6 | 1.208814 | 0.2991914 | ▁▁▂▆▇ | NA |
if (show_missing_values) {
plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
# don't show choices again, if they're basically same thing as value labels
if (!is.null(choices) && !is.null(item_info$choices) &&
all(names(na.omit(choices)) == item_info$choices) &&
all(na.omit(choices) == names(item_info$choices))) {
item_info$choices <- NULL
}
item_info$label_parsed <-
item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
pander::pander(as.list(choices))
}
show_missing_values <- FALSE
if (has_labels(item)) {
missing_values <- item[is.na(haven::zap_missing(item))]
attributes(missing_values) <- attributes(item)
if (!is.null(attributes(item)$labels)) {
attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
}
if (is.double(item)) {
show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
item <- haven::zap_missing(item)
}
if (length(item_attributes$labels) == 0 && is.numeric(item)) {
item <- haven::zap_labels(item)
}
}
item_nomiss <- item[!is.na(item)]
# unnest mc_multiple and so on
if (
is.character(item_nomiss) &&
any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
!is.null(item_info) &&
(exists("type", item_info) &&
any(stringr::str_detect(item_info$type,
pattern = stringr::fixed("multiple"))))
) {
item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)
old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
if ( go_vertical ) {
# numeric items are plotted horizontally (because that's what usually expected)
# categorical items are plotted vertically because we can use the screen real estate better this way
if (is.null(choices) ||
dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
non_missing_choices <- unique(item_nomiss)
names(non_missing_choices) <- non_missing_choices
}
choice_multiplier <- old_height/6.5
new_height <- 2 + choice_multiplier * length(non_missing_choices)
new_height <- ifelse(new_height > 20, 20, new_height)
new_height <- ifelse(new_height < 1, 1, new_height)
if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
new_height <- old_height
}
knitr::opts_chunk$set(fig.height = new_height)
}
wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
if (is.character(item_nomiss)) {
char_count <- stringr::str_count(item_nomiss)
attributes(char_count)$label <- item_label
plot_labelled(char_count,
item_name, wrap_at, FALSE, trans = "log1p", "characters")
} else {
cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}
}
knitr::opts_chunk$set(fig.height = old_height)
0 missing values.
attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
| name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
|---|---|---|---|---|---|---|---|---|---|---|
| Healthy life expectancy | numeric | 0 | 1 | 0 | 0.79 | 1.1 | 0.7252436 | 0.242124 | ▁▃▃▇▅ | NA |
if (show_missing_values) {
plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
# don't show choices again, if they're basically same thing as value labels
if (!is.null(choices) && !is.null(item_info$choices) &&
all(names(na.omit(choices)) == item_info$choices) &&
all(na.omit(choices) == names(item_info$choices))) {
item_info$choices <- NULL
}
item_info$label_parsed <-
item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
pander::pander(as.list(choices))
}
show_missing_values <- FALSE
if (has_labels(item)) {
missing_values <- item[is.na(haven::zap_missing(item))]
attributes(missing_values) <- attributes(item)
if (!is.null(attributes(item)$labels)) {
attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
}
if (is.double(item)) {
show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
item <- haven::zap_missing(item)
}
if (length(item_attributes$labels) == 0 && is.numeric(item)) {
item <- haven::zap_labels(item)
}
}
item_nomiss <- item[!is.na(item)]
# unnest mc_multiple and so on
if (
is.character(item_nomiss) &&
any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
!is.null(item_info) &&
(exists("type", item_info) &&
any(stringr::str_detect(item_info$type,
pattern = stringr::fixed("multiple"))))
) {
item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)
old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
if ( go_vertical ) {
# numeric items are plotted horizontally (because that's what usually expected)
# categorical items are plotted vertically because we can use the screen real estate better this way
if (is.null(choices) ||
dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
non_missing_choices <- unique(item_nomiss)
names(non_missing_choices) <- non_missing_choices
}
choice_multiplier <- old_height/6.5
new_height <- 2 + choice_multiplier * length(non_missing_choices)
new_height <- ifelse(new_height > 20, 20, new_height)
new_height <- ifelse(new_height < 1, 1, new_height)
if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
new_height <- old_height
}
knitr::opts_chunk$set(fig.height = new_height)
}
wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
if (is.character(item_nomiss)) {
char_count <- stringr::str_count(item_nomiss)
attributes(char_count)$label <- item_label
plot_labelled(char_count,
item_name, wrap_at, FALSE, trans = "log1p", "characters")
} else {
cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}
}
knitr::opts_chunk$set(fig.height = old_height)
0 missing values.
attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
| name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
|---|---|---|---|---|---|---|---|---|---|---|
| Freedom to make life choices | numeric | 0 | 1 | 0 | 0.42 | 0.63 | 0.3925705 | 0.1432895 | ▁▃▆▇▆ | NA |
if (show_missing_values) {
plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
# don't show choices again, if they're basically same thing as value labels
if (!is.null(choices) && !is.null(item_info$choices) &&
all(names(na.omit(choices)) == item_info$choices) &&
all(na.omit(choices) == names(item_info$choices))) {
item_info$choices <- NULL
}
item_info$label_parsed <-
item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
pander::pander(as.list(choices))
}
show_missing_values <- FALSE
if (has_labels(item)) {
missing_values <- item[is.na(haven::zap_missing(item))]
attributes(missing_values) <- attributes(item)
if (!is.null(attributes(item)$labels)) {
attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
}
if (is.double(item)) {
show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
item <- haven::zap_missing(item)
}
if (length(item_attributes$labels) == 0 && is.numeric(item)) {
item <- haven::zap_labels(item)
}
}
item_nomiss <- item[!is.na(item)]
# unnest mc_multiple and so on
if (
is.character(item_nomiss) &&
any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
!is.null(item_info) &&
(exists("type", item_info) &&
any(stringr::str_detect(item_info$type,
pattern = stringr::fixed("multiple"))))
) {
item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)
old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
if ( go_vertical ) {
# numeric items are plotted horizontally (because that's what usually expected)
# categorical items are plotted vertically because we can use the screen real estate better this way
if (is.null(choices) ||
dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
non_missing_choices <- unique(item_nomiss)
names(non_missing_choices) <- non_missing_choices
}
choice_multiplier <- old_height/6.5
new_height <- 2 + choice_multiplier * length(non_missing_choices)
new_height <- ifelse(new_height > 20, 20, new_height)
new_height <- ifelse(new_height < 1, 1, new_height)
if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
new_height <- old_height
}
knitr::opts_chunk$set(fig.height = new_height)
}
wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
if (is.character(item_nomiss)) {
char_count <- stringr::str_count(item_nomiss)
attributes(char_count)$label <- item_label
plot_labelled(char_count,
item_name, wrap_at, FALSE, trans = "log1p", "characters")
} else {
cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}
}
knitr::opts_chunk$set(fig.height = old_height)
0 missing values.
attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
| name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
|---|---|---|---|---|---|---|---|---|---|---|
| Generosity | numeric | 0 | 1 | 0 | 0.18 | 0.57 | 0.1848462 | 0.0952544 | ▆▇▆▁▁ | NA |
if (show_missing_values) {
plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
# don't show choices again, if they're basically same thing as value labels
if (!is.null(choices) && !is.null(item_info$choices) &&
all(names(na.omit(choices)) == item_info$choices) &&
all(na.omit(choices) == names(item_info$choices))) {
item_info$choices <- NULL
}
item_info$label_parsed <-
item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
pander::pander(as.list(choices))
}
show_missing_values <- FALSE
if (has_labels(item)) {
missing_values <- item[is.na(haven::zap_missing(item))]
attributes(missing_values) <- attributes(item)
if (!is.null(attributes(item)$labels)) {
attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
}
if (is.double(item)) {
show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
item <- haven::zap_missing(item)
}
if (length(item_attributes$labels) == 0 && is.numeric(item)) {
item <- haven::zap_labels(item)
}
}
item_nomiss <- item[!is.na(item)]
# unnest mc_multiple and so on
if (
is.character(item_nomiss) &&
any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
!is.null(item_info) &&
(exists("type", item_info) &&
any(stringr::str_detect(item_info$type,
pattern = stringr::fixed("multiple"))))
) {
item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)
old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
if ( go_vertical ) {
# numeric items are plotted horizontally (because that's what usually expected)
# categorical items are plotted vertically because we can use the screen real estate better this way
if (is.null(choices) ||
dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
non_missing_choices <- unique(item_nomiss)
names(non_missing_choices) <- non_missing_choices
}
choice_multiplier <- old_height/6.5
new_height <- 2 + choice_multiplier * length(non_missing_choices)
new_height <- ifelse(new_height > 20, 20, new_height)
new_height <- ifelse(new_height < 1, 1, new_height)
if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
new_height <- old_height
}
knitr::opts_chunk$set(fig.height = new_height)
}
wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
if (is.character(item_nomiss)) {
char_count <- stringr::str_count(item_nomiss)
attributes(char_count)$label <- item_label
plot_labelled(char_count,
item_name, wrap_at, FALSE, trans = "log1p", "characters")
} else {
cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
}
}
knitr::opts_chunk$set(fig.height = old_height)
0 missing values.
attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
| name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
|---|---|---|---|---|---|---|---|---|---|---|
| Perceptions of corruption | numeric | 0 | 1 | 0 | 0.085 | 0.45 | 0.1106026 | 0.0945378 | ▇▅▁▁▁ | NA |
if (show_missing_values) {
plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
# don't show choices again, if they're basically same thing as value labels
if (!is.null(choices) && !is.null(item_info$choices) &&
all(names(na.omit(choices)) == item_info$choices) &&
all(na.omit(choices) == names(item_info$choices))) {
item_info$choices <- NULL
}
item_info$label_parsed <-
item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
pander::pander(as.list(choices))
}
missingness_report
if (length(md_pattern)) {
if (knitr::is_html_output()) {
rmarkdown::paged_table(md_pattern, options = list(rows.print = 10))
} else {
knitr::kable(md_pattern)
}
}
items
export_table(metadata_table)
jsonld
JSON-LD metadata
The following JSON-LD can be found by search engines, if you share this codebook publicly on the web.
{
"name": "World Happiness reports data",
"datePublished": "2020-06-01",
"description": "The dataset has N=156 rows and 9 columns.\n156 rows have no missing values on any column.\n\n\n## Table of variables\nThis table contains variable names, labels, and number of missing values.\nSee the complete codebook for more.\n\n|name |label | n_missing|\n|:----------------------------|:-----|---------:|\n|Overall rank |NA | 0|\n|Country or region |NA | 0|\n|Score |NA | 0|\n|GDP per capita |NA | 0|\n|Social support |NA | 0|\n|Healthy life expectancy |NA | 0|\n|Freedom to make life choices |NA | 0|\n|Generosity |NA | 0|\n|Perceptions of corruption |NA | 0|\n\n### Note\nThis dataset was automatically described using the [codebook R package](https://rubenarslan.github.io/codebook/) (version 0.8.2).",
"keywords": ["Overall rank", "Country or region", "Score", "GDP per capita", "Social support", "Healthy life expectancy", "Freedom to make life choices", "Generosity", "Perceptions of corruption"],
"@context": "http://schema.org/",
"@type": "Dataset",
"variableMeasured": [
{
"name": "Overall rank",
"@type": "propertyValue"
},
{
"name": "Country or region",
"@type": "propertyValue"
},
{
"name": "Score",
"@type": "propertyValue"
},
{
"name": "GDP per capita",
"@type": "propertyValue"
},
{
"name": "Social support",
"@type": "propertyValue"
},
{
"name": "Healthy life expectancy",
"@type": "propertyValue"
},
{
"name": "Freedom to make life choices",
"@type": "propertyValue"
},
{
"name": "Generosity",
"@type": "propertyValue"
},
{
"name": "Perceptions of corruption",
"@type": "propertyValue"
}
]
}`